

********************************************************************************
********************************************************************************
*************************** THIS IS DO FILE 2 **********************************
****************** THIS FILE CREATES THE SUMMARY STATS  ************************
********************************************************************************
********************************************************************************

*Version: 2023 March 23


cap mkdir `"$output/log"'
cap mkdir `"$output/summary_stats"'
cap mkdir `"$output/summary_stats/parameters"'
cap mkdir `"$output/log/2_summary_stats"'
cap mkdir `"$output/summary_stats/data"'
cap mkdir `"$output/summary_stats/state"'
cap mkdir `"$output/summary_stats/variation"'
cap mkdir `"$output/paper_inputs"'

cap log close 
log using `"$output/log/2_summary_stats/2_summary_stats.smcl"', replace 
	
di "This run uses code version from 2023/03/29"


********************************************************************************
********************************************************************************
***************************** VISIT SUMMARY STATS ******************************
***************************** TABLE 1 AND TABLE 2 ******************************
********************************************************************************
********************************************************************************

use `"$output/data/$line_file"', clear


//payer 
gen medicare = 0 
gen medicaid = 0 
gen commercial = 0 
replace medicare = 1 if pay_type_pooled == 1 
replace medicaid = 1 if pay_type_pooled == 2 
replace commercial = 1 if pay_type_pooled == 3 


//at least one denial  
bys visit_billing_id: gegen denied_visit = min(Y_0)
replace denied_visit = 1-denied_visit

//number of line items 
bys visit_billing_id: gen n_line_items = _N 

//number of claims 
forval x = 1/6 {
	
	replace R_`x' = 0 if R_`x' == . 
}
	
egen R_sum = rowtotal(R_?) // sum resubmissions 
replace R_sum = R_sum + 1 //sum of resubmissions plus initial submission 
bys visit_billing_id: gegen max_claim = max(R_sum)

// visit allowed amt 
bys visit_billing_id: gegen tot_allowed_amt = sum(line_item_value)

//denied amt initial denial 
bys visit_billing_id: gegen denied_amt_tmp = sum(line_item_value) if Y_0 ==0
bys visit_billing_id: gegen denied_amt = min(denied_amt_tmp)
drop denied_amt_tmp
replace denied_amt = 0 if denied_amt == . 

//denied amt final 
egen Y_max = rowmax(Y_?)
assert Y_max == 0 | Y_max == 1
bys visit_billing_id: gegen denied_amt_tmp = sum(line_item_value) if Y_max ==0
bys visit_billing_id: gegen denied_amt_final = min(denied_amt_tmp)
drop denied_amt_tmp
replace denied_amt_final = 0 if denied_amt_final == . 

//generate share of reubmitted claims 
gen denied_line = 1-Y_0
bys pay_type_pooled: gegen denied_item = sum(denied_line)
bys pay_type_pooled: gegen one_resub_item = sum(R_1)
bys pay_type_pooled: gegen two_resub_item = sum(R_2)
bys pay_type_pooled: gegen three_resub_item = sum(R_3)
bys pay_type_pooled: gegen four_resub_item = sum(R_4)
bys pay_type_pooled: gegen five_resub_item = sum(R_5)

gen share_r1 = one_resub_item/denied_item
gen share_r2 = two_resub_item/denied_item
gen share_r3 = three_resub_item/denied_item
gen share_r4 = four_resub_item/denied_item
gen share_r5 = five_resub_item/denied_item


//keep at visit level 
bys visit_billing_id: keep if _n == 1 

//generate collected visit revenue
gen revenue = tot_allowed_amt - denied_amt_final

//generate variable labels 
lab var tot_allowed_amt "Initial claim value"
lab var n_line_items "Number of line items"
lab var max_claim "Number of submitted claims"
lab var denied_visit "Some items denied (=1)"
lab var denied_amt "Initial denied amount"
lab var denied_amt_final "Final denied amount"
lab var medicare "Medicare patient (0,1)"
lab var medicaid "Medicaid patient (0,1)"
lab var commercial "Private patient (0,1)"
lab var year "Year"
lab var revenue "Collected visit revenue"
lab var share_r1 "\multicolumn{1}{r}{Once}"
lab var share_r2 "\multicolumn{1}{r}{Twice}"
lab var share_r3 "\multicolumn{1}{r}{Three times}"
lab var share_r4 "\multicolumn{1}{r}{Four times}"
lab var share_r5 "\multicolumn{1}{r}{Five or more times}"

gen placeholder = .
lab var placeholder "\multicolumn{4}{l}{Share of denied claims resubmitted:}"



eststo clear 
// Table 1
estpost tabstat tot_allowed_amt n_line_items max_claim denied_visit denied_amt ///
	denied_amt_final medicare medicaid commercial, stat(mean sd p10 p90 n) columns(statistics) 

#delimit ;
	esttab using `"$paperinputs/visit_summary_stats_total.tex"', 
	cells("mean(fmt(2)) sd(fmt(2)) p10(fmt(2)) p90(fmt(2)) count(fmt(%20.0fc))") 
	unstack nonumbers booktabs nomtitles replace substitute(_ \_) label
	noobs collabels("Mean" "SD" "10th Percentile" "90th Percentile" "Observations")
	order(tot_allowed_amt n_line_items max_claim denied_visit denied_amt 
		  denied_amt_final medicare medicaid commercial);
#delimit cr 


eststo clear 
// Table 2 
foreach p in "Medicare" "Medicaid" "Commercial" {
	
	preserve
		keep if pay_type_pooled_st	== "`p'"

		estpost tabstat tot_allowed_amt denied_visit denied_amt denied_amt_final ///
			revenue placeholder share_r1 share_r2 share_r3 share_r4 share_r5, ///
			stat(mean) columns(statistics) 
			
		estimates save `"$output/summary_stats/parameters/sumstats_`p'.ster"', replace 
	restore
	
	est use `"$output/summary_stats/parameters/sumstats_`p'.ster"'
	eststo sumstats_`p'
	
}
	

#delimit ;
	esttab sumstats_Medicaid sumstats_Medicare sumstats_Commercial 
		using "$paperinputs/summary_payer.tex" ,cells("mean(fmt(3))") 
	unstack nonumbers booktabs replace substitute(_ \_) 
	noobs collabels(none) mtitles("Medicaid" "Medicare" "Commercial") label
	order(tot_allowed_amt denied_visit denied_amt denied_amt_final revenue);
#delimit cr 




********************************************************************************
********************************************************************************
************************ LINE LEVEL SUMMARY STATS  *****************************
********************************** TABLE 3 *************************************
********************************************************************************
********************************************************************************
clear all
use `"$output/data/$line_file"', clear

//generate denial indicator 
gen d_0 = 1-Y_0
gen d_1 = 1-Y_1
gen d_2 = 1-Y_2
gen d_3 = 1-Y_3
gen d_4 = 1-Y_4
gen d_5 = 1-Y_5


//generate reason code cateogry dummies 
gen dummy_adm = 0
gen dummy_cov = 0 
gen dummy_dup = 0 
gen dummy_info = 0 
gen dummy_cont = 0

replace dummy_adm = 1 if d_0 == 1 & visit_code_cat == "administrative"
replace dummy_cov = 1 if d_0 == 1 & visit_code_cat == "coverage"
replace dummy_dup = 1 if d_0 == 1 & visit_code_cat == "duplicate"
replace dummy_info = 1 if d_0 == 1 & visit_code_cat == "information"
replace dummy_cont = 1 if d_0 == 1 & visit_code_cat == "contractual"

label var dummy_adm "Administrative Denial Dummy"
label var dummy_cov "Coverage Denial Dummy" 
label var dummy_dup "Duplicate Denial Dummy"
label var dummy_info "Information Denial Dummy" 
label var dummy_cont "Contractual Adjustment Dummy" 

//generate dummies at the intersection of reason code categories and denial 
gen dummy_adm_d = 1 if d_0 == 1 & visit_code_cat == "administrative"
gen dummy_cov_d = 1 if d_0 == 1 & visit_code_cat == "coverage"
gen dummy_dup_d = 1 if d_0 == 1 & visit_code_cat == "duplicate"
gen dummy_info_d = 1 if d_0 == 1 & visit_code_cat == "information"
gen dummy_cont_d = 1 if d_0 == 1 & visit_code_cat == "contractual"

replace  dummy_adm_d = . if Y_0 == 1 
replace  dummy_cov_d = . if Y_0 == 1 
replace  dummy_dup_d = . if Y_0 == 1 
replace  dummy_info_d = . if Y_0 == 1 
replace  dummy_cont_d = . if Y_0 == 1 

replace  dummy_adm_d = 0 if Y_0 == 0 & dummy_adm_d == .
replace  dummy_cov_d = 0 if Y_0 == 0 & dummy_cov_d == .
replace  dummy_dup_d = 0 if Y_0 == 0 & dummy_dup_d == .
replace  dummy_info_d = 0 if Y_0 == 0 & dummy_info_d == .
replace  dummy_cont_d = 0 if Y_0 == 0 & dummy_cont_d == .


//generate shares of denials for each reason code category 
foreach var in dummy_adm_d dummy_cov_d dummy_dup_d dummy_info_d dummy_cont_d ///
	dummy_adm dummy_cov dummy_dup dummy_info dummy_cont {
	
	bys pay_type_pooled: gegen m_`var' = mean(`var')
	
}

//assert that denial shares sum to 1
egen denial_tot = rowtotal(m_dummy_adm_d m_dummy_cov_d m_dummy_dup_d m_dummy_info_d m_dummy_cont_d)
gen rd_denial_tot = round(denial_tot, 0.001)
assert rd_denial_tot == 1 
drop rd_denial_tot denial_tot

//one variable for the denial rate of the various reason code categories
gen denial_rate = m_dummy_adm if visit_code_cat == "administrative"
replace denial_rate = m_dummy_cov if visit_code_cat == "coverage"
replace denial_rate = m_dummy_dup if visit_code_cat == "duplicate"
replace denial_rate = m_dummy_info if visit_code_cat == "information"
replace denial_rate = m_dummy_cont if visit_code_cat == "contractual"

//one variable for the denial shares of the various reason code categories
gen denial_shares = m_dummy_adm_d if visit_code_cat == "administrative"
replace denial_shares = m_dummy_cov_d if visit_code_cat == "coverage"
replace denial_shares = m_dummy_dup_d if visit_code_cat == "duplicate"
replace denial_shares = m_dummy_info_d if visit_code_cat == "information"
replace denial_shares = m_dummy_cont_d if visit_code_cat == "contractual"
 

//gen variabe for resubmissions at the line level 
gen R_cond = R_1 if Y_0==0 

//count the number of resubmissions of a given line item 
gen resubmissions = .
forv k = 1/6 {
	replace resubmissions = `k' if R_`k'==1
	}
replace resubmissions = 0 if R_cond == 0 

//generate a variable indicating recovery of payment 
egen rec_rate = rowmax(Y_?)


//generate the amount that is initially denied 
gen amt_den = line_item_value if d_0 == 1 


//label variables
label var line_item_value "Allowed amount"
label var d_0 "Initial denial"
label var R_cond "First resubmission"
label var resubmissions "Future resubmission"
label var rec_rate "Recovery rate"

replace visit_code_cat = "Administrative" if visit_code_cat == "administrative"
replace visit_code_cat = "Coverage" if visit_code_cat == "coverage"
replace visit_code_cat = "Duplicate" if visit_code_cat == "duplicate"
replace visit_code_cat = "Information" if visit_code_cat == "information"
replace visit_code_cat = "Contractual" if visit_code_cat == "contractual"


// Table 3 
foreach p in "Medicare" "Medicaid" "Commercial" {
	
	preserve
		keep if pay_type_pooled_st	== "`p'"

		estpost tabstat denial_rate denial_shares amt_den R_cond /// 
			resubmissions rec_rate, by(visit_code_cat) stat(mean) nototal 	
			
		#delimit ;
		
			esttab using `"$paperinputs/summary_stats_reason_code_`p'.tex"', 
			cells("denial_rate(fmt(3)) denial_shares(fmt(3)) amt_den(fmt(2)) 
					R_cond(fmt(2)) resubmissions(fmt(2)) rec_rate(fmt(2))") 
			booktabs unstack nonumbers nomtitles replace substitute(_ \_) label
			noobs collabels("\shortstack{Denial \\ Rate}" 
							"\shortstack{Share of \\ Denials }" 
							"\shortstack{Mean \\ Line Item  Value}" 
							"\shortstack{Mean Pr. of \\ Resubmission}" 
							"\shortstack{Mean \# of \\Resubmissions}"  
							"\shortstack{Mean \\ Recovery Rate}") ;

		#delimit cr
	restore 
	
}

//Combine the tables into one table with three panels 
cap prog drop panelcombine
prog define panelcombine
qui {
syntax, use(str asis) paneltitles(str asis) columncount(integer) save(str asis) [CLEANup]
preserve

tokenize `"`paneltitles'"'
//read in loop
local num 1
while "``num''"~="" {
local panel`num'title="``num''"
local num=`num'+1
}


tokenize `use'
//read in loop
local num 1
while "``num''"~="" {
tempfile temp`num'
insheet using "``num''", clear
save `temp`num''
local max = `num'
local num=`num'+1
}

//conditional processing loop
local num 1
while "``num''"~="" {
local panellabel : word `num' of `c(alpha)'
use `temp`num'', clear
	if `num'==1 { //process first panel -- clip bottom
	drop if strpos(v1,"Note:")>0 | strpos(v1,"in parentheses")>0 | strpos(v1,"p<0")>0
	drop if v1=="\end{tabular}" | v1=="}"
	replace v1 = "\multicolumn{`columncount'}{c}{\textbf{Panel `panellabel': `panel1title'}}  \vspace{3pt} \\" if v1=="\midrule" & _n<8
	replace v1 = "" if v1=="\bottomrule" & _n>4 //this is intended to replace the bottom double line; more robust condition probably exists
	}
	else if `num'==`max' { //process final panel -- clip top
	//process header to drop everything until first hline
	g temp = (v1 == "\midrule")
	replace temp = temp+temp[_n-1] if _n>1
	drop if temp==0
	drop temp
	
	replace v1 = " \multicolumn{`columncount'}{c}{\textbf{Panel `panellabel': `panel`num'title'}} \vspace{3pt} \\" if _n==1
	}
	else { //process middle panels -- clip top and bottom
	//process header to drop everything until first hline
	g temp = (v1 == "\midrule")
	replace temp = temp+temp[_n-1] if _n>1
	drop if temp==0
	drop temp
	
	replace v1 = " \multicolumn{`columncount'}{c}{\textbf{Panel `panellabel': `panel`num'title'}} \vspace{3pt} \\" if _n==1
	drop if strpos(v1,"Note:")>0 | strpos(v1,"in parentheses")>0 | strpos(v1,"p<0")>0
	drop if v1=="\end{tabular}" | v1=="}"
	replace v1 = "" if v1=="\bottomrule"
	}
	save `temp`num'', replace
local num=`num'+1
}

use `temp1',clear
local num 2
while "``num''"~="" {
append using `temp`num''
local num=`num'+1
}

outsheet using `save', noname replace noquote


	if "`cleanup'"!="" { //erasure loop
	tokenize `use'
	local num 1
		while "``num''"~="" {
		erase "``num''"
		local num=`num'+1
		}
	}

restore
}
end


cd "$paperinputs"
panelcombine, use(summary_stats_reason_code_Medicaid.tex summary_stats_reason_code_Medicare.tex summary_stats_reason_code_Commercial.tex)  columncount(7) paneltitles("Medicaid" "Medicare" "Commercial") save(`"$paperinputs/summary_line.tex"') cleanup
cd "..\..\code\main_article"



********************************************************************************
********************************************************************************
********** DATA -- VARIATION IN VISIT AMOUNTS, DENIALS AND RESUBMISSIONS *******
********************************** FIGURE 2 ************************************
********************************************************************************
********************************************************************************
clear all
use `"$output/data/$line_file"', clear

//generate denial and recovery rate variables 
gen obs = 1 
gen D_0 = 1 - Y_0
egen rec_rate = rowmax(Y_1 Y_2 Y_3 Y_4 Y_5 Y_6)
lab var rec_rate "Line item recovery rate"

gen obs_resub = R_1

//collapse data at payer-procedure
gcollapse (mean) line_item_value D_0 R_1 rec_rate (count) obs (sum) obs_resub, ///
	by(pay_type_pooled proc1) 

save `"$output/summary_stats/variation/bined_variation.dta"', replace 



********************************************************************************
********************************************************************************
************************* SURVEY SUMMARY STATISTICS ****************************
******************************** TABLE 4****************************************
********************************************************************************
********************************************************************************

cap mkdir `"$output/summary_stats"'
cap mkdir `"$output/summary_stats/survey"'

use `"$raw_input/$x_state_file_regression"', clear 


//genreate group definition -- use only TIN info to connect groups
gen skaGroup3a=string(ID)
replace skaGroup3a="MDPPASTIN"+tin  if ~missing(tin)
 
cap drop group?
gegen group3 = group(skaGroup3a)
drop if missing(paystateid)


//cross state variable -- number of states a group is spanning 
cap drop totstates?	
sort year group3 paystateid
by year group3 paystateid: gen markstate=_n==1
by year group3: egen totstates3=sum(markstate)
drop markstate

//create group identifier that indicates groups we can identify with the tax ID
//spanning multiple states 
gen group = 0 
replace group = 1 if group3 !=. & totstates3 > 1 

//generate Medicare accept variable 
gen accept_medicare = 1 if medicare == "Y"
replace accept_medicare = 0 if medicare == "N"
bys npimedicare_id: gegen mean_medicare = mean(accept_medicare)
gen capacity = 1 - mean_medicare

//Medicaid conditional on Medicare 
gen medicaid_condmedicare = . 
replace medicaid_condmedicare =acceptinsurance if accept_medicare == 1 ///
	& ~missing(accept_medicare)


//Medicaid conditional on no Medicare 
gen medicaid_condnomedicare = . 
replace medicaid_condnomedicare =acceptinsurance if accept_medicare == 0 ///
	& ~missing(accept_medicare)


//variabl labels 
lab var acceptinsurance "\quad Medicaid (0,1)"
lab var accept_medicare "\quad Medicare (0,1)"
lab var medicaid_condmedicare "\quad Medicaid $\vert$ Doctor accepts Medicare"
lab var medicaid_condnomedicare "\quad Medicaid $\vert$ Doctor does not accept Medicare"
lab var group "Cross-state group (Tax ID; 0,1)"
lab var year "Year"
//generate placeholder 
gen placeholder = 1
gen placeholder1 = 1
 

cd `"$output/summary_stats/survey"'
 

eststo clear 
// Table 3, first part -- Medicare and Medicaid acceptance 
estpost tabstat acceptinsurance accept_medicare medicaid_condmedicare ///
	medicaid_condnomedicare, stat(mean sd p10 p90 n) columns(statistics) 

#delimit ;
	esttab using "table3_1.tex", label noobs
	cells("mean(fmt(3)) sd(fmt(2)) p10(fmt(2)) p90(fmt(2)) count(fmt(%20.0fc))")
	unstack nonumbers booktabs replace substitute(_ \_) nomtitles 
	collabels("Mean" "SD" "10th Percentile" "90th Percentile" "Observations") 
	order(acceptinsurance accept_medicare medicaid_condmedicare medicaid_condnomedicare);
#delimit cr 



// Table 3, second part -- group summery
estpost tabstat group, stat(mean sd p10 p90 n) columns(statistics) 

#delimit ;
	esttab using "table3_2.tex", 
	cells("mean(fmt(3)) sd(fmt(2)) p10(fmt(2)) p90(fmt(2)) count(fmt(%20.0fc))") 
	unstack nonumbers booktabs replace substitute(_ \_) nomtitles
	collabels("Mean" "SD" "10th Percentile" "90th Percentile" "Observations") 
	noobs label;
#delimit cr 


cd "..\..\..\..\code\main_article"

//generate the mover summary stats line 
use `"$raw_input/$full_mover_file"', clear 

gen mover=move


lab var mover "Cross-state mover (0,1)"

eststo clear 
// Table 3, third part --mover 
estpost tabstat mover, stat(mean sd p10 p90 n) columns(statistics) 
cd `"$output/summary_stats/survey"'

#delimit ;
		esttab using "table3_mover.tex", 
		cells("mean(fmt(3)) sd(fmt(2)) p10(fmt(2)) p90(fmt(2)) count(fmt(%20.0fc))") 
		unstack nonumbers booktabs replace substitute(_ \_) nomtitles  
		collabels("Mean" "SD" "10th Percentile" "90th Percentile" "Observations") 
		noobs label;
#delimit cr  





//Combine the tables into one tables with three panels 
cap prog drop panelcombine
prog define panelcombine
qui {
syntax, use(str asis) paneltitles(str asis) columncount(integer) save(str asis) [CLEANup]
preserve

tokenize `"`paneltitles'"'
//read in loop
local num 1
while "``num''"~="" {
local panel`num'title="``num''"
local num=`num'+1
}


tokenize `use'
//read in loop
local num 1
while "``num''"~="" {
tempfile temp`num'
insheet using "``num''", clear
save `temp`num''
local max = `num'
local num=`num'+1
}

//conditional processing loop
local num 1
while "``num''"~="" {
local panellabel : word `num' of `c(alpha)'
use `temp`num'', clear
	if `num'==1 { //process first panel -- clip bottom
	drop if strpos(v1,"Note:")>0 | strpos(v1,"in parentheses")>0 | strpos(v1,"p<0")>0
	drop if v1=="}" | v1=="\end{tabular}" | v1=="\bottomrule"
	replace v1 = "Physician accepts: \\" if v1=="placeholder         &            &            &            &            &            \\"
	replace v1 = "[0.5em]" if v1=="placeholder1        &            &            &            &            &            \\"
	}
	else if `num'==`max' { //process final panel -- clip top
	//process header to drop everything until first hline
	g temp = (v1 == "\midrule")
	replace temp = temp+temp[_n-1] if _n>1
	drop if temp==0
	drop temp
	drop if v1 == "\midrule"
	
	}
	else if `num'==2 { //process middle panels -- clip top and bottom
	//process header to drop everything until first hline
	g temp = (v1 == "\midrule")
	replace temp = temp+temp[_n-1] if _n>1
	drop if temp==0
	drop temp
	
	replace v1 = " \midrule" if _n==1
	drop if strpos(v1,"Note:")>0 | strpos(v1,"in parentheses")>0 | strpos(v1,"p<0")>0
	drop if v1=="\end{tabular}" | v1=="}"
	replace v1 = "" if v1=="\bottomrule"
	}
	
	else if `num'==3 { //process middle panels -- clip top and bottom
	//process header to drop everything until first hline
	g temp = (v1 == "\midrule")
	replace temp = temp+temp[_n-1] if _n>1
	drop if temp==0
	drop temp
	
	drop if strpos(v1,"Note:")>0 | strpos(v1,"in parentheses")>0 | strpos(v1,"p<0")>0
	drop if v1=="\end{tabular}" | v1=="}"
	replace v1 = "" if v1=="\bottomrule"
	drop if v1 == "\midrule" 
	}

	save `temp`num'', replace
local num=`num'+1
}

use `temp1',clear
local num 2
while "``num''"~="" {
append using `temp`num''
local num=`num'+1
}

outsheet using `save', noname replace noquote


	if "`cleanup'"!="" { //erasure loop
	tokenize `use'
	local num 1
		while "``num''"~="" {
		erase "``num''"
		local num=`num'+1
		}
	}

restore
}
end

panelcombine, use(table3_1.tex table3_mover.tex table3_2.tex)  columncount(6) ///
	paneltitles("") save(`"summary_survey.tex"') cleanup
copy "summary_survey.tex" "..\..\paper_inputs\summary_survey.tex", replace

cd "..\..\..\..\code\main_article"

log close 

